#!/usr/bin/perl
#file: contrast.pl 
#created: 08-21-2012
#author: Tim Bergsma, copyright (C) 2012 Metrum Research Group, LLC.

#This program is free software; you can redistribute it and/or
#modify it under the terms of the GNU General Public License
#as published by the Free Software Foundation; either version 2
#of the License, or (at your option) any later version.

#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#GNU General Public License for more details.

#You should have received a copy of the GNU General Public License
#along with this program; if not, write to the Free Software
#Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

#purpose: difference.pl compares arguments 1 and 2 (xml) with respect to elements
#described in argument 3 (xpath specifiers). xml-formated output is passed to stdout
#or to argument 4 filename if specified.

# Tasks:
# 1. Parse xml from first argument (stream or filename).
# 2. Parse xml from second argument (stream or filename).
# 3. Parse xpath expressions from third argument (stream or a filename).
# 4. For each xpath expression, seek a value vector in first and second object.
# 5. Format the results in a regular way as xml.
# 6. Print to commandline, or to file if fourth argument is present.

use strict;
use warnings;
use XML::XPath;
use Cwd;

unless (@ARGV >=3){
	die <<EOF;
	
	usage: perl contrast.pl your.xml my.xml xpath.lst [output.xml]

	see http://nmqual.googlecode.com

EOF
}

#parse left file
my $leftFile = shift @ARGV;
my $leftXml;
if (!($leftFile=~m/\n/) && -e $leftFile){
	$leftXml = XML::XPath->new(filename => $leftFile) or die "$!\n";
}else{
	$leftXml = XML::XPath->new(xml => $leftFile) or die "$!\n";
}

#parse right file
my $rightFile = shift @ARGV;
my $rightXml;
if (!($rightFile=~m/\n/) && -e $rightFile){
	$rightXml = XML::XPath->new(filename => $rightFile) or die "$!\n";
}else{
	$rightXml = XML::XPath->new(xml => $rightFile) or die "$!\n";
}

#parse xpath file
my $xpathfile = shift @ARGV;
my @xpath;
if(!($xpathfile=~m/\n/) && -e $xpathfile){
	open(INPUT,$xpathfile);
	@xpath=<INPUT>;
	chomp @xpath;
	close(INPUT);
}else{
	@xpath = split('\n',$xpathfile);
}

#detect output file
my $file;
if(@ARGV){$file=shift @ARGV;}
if(@ARGV) {die "too many arguments\n";}

# At this point, we have two xpath objects to contrast, a list of xpath expressions,
# and possibly a requested output file name. Iterate across the xpath expressions,
# pairing the results.  Print to stdout or file if available.

my @left = doPaths(\@xpath,$leftXml);
my @right = doPaths(\@xpath,$rightXml);
die if @left != @xpath;
die if @left != @right;

# Create pretty xml from @xpath, @left, and @right.
my $xml = logPaths(\@xpath, \@left, \@right);
my $result = $xml->toString;
$result = pretty($result);

# Print to stdout or file if available.
if($file){
	open (OUTFILE, ">$file") or die "can't open $file for writing\n$!\n";
	print OUTFILE $result;
	close OUTFILE;
}else{
	print $result;
}

### subroutines ###

sub doNode{ # always returns text
	my $node = shift;
	my $xml = shift;
	my $type = $node->getNodeType;
	my $textnode;
	my $text;
	if($type==1){ # ELEMENT_NODE
		$textnode = $xml->find('./text()' , $node);
		$text = $textnode->string_value;
	}elsif($type==3){ # TEXT_NODE
		$text = $node->string_value;
	}
	return $text;	
}
			
sub doPath{ # always returns (ref to anonymous) array
	my $path = shift;
	my $xml = shift;
	my @nodes = $xml->findnodes($path)->get_nodelist;
	#return ['no matches'] if (!(@nodes));
	my @vector;
	foreach(@nodes){
		my $text = doNode($_,$xml);
		push @vector, $text;
	}
	return @vector;
}
	
sub doPaths{ # returns (ref to anonymous) array of array(refs)
	my $pathsRef = shift;
	my $xml = shift;
	my @paths = @$pathsRef;
	#my $len = @paths;
	#return[['no paths']] if (!(@paths));
	my @stack;
	foreach(@paths){
		my @vector = doPath($_,$xml);
		push @stack, \@vector;
	}
	return @stack;
}

sub logPaths{ # returns an xpath object
	my $pathsRef = shift;
	my @paths = @$pathsRef;
	my $leftRef = shift;
	my @left = @$leftRef; # array of arrays
	my $rightRef = shift;
	my @right = @$rightRef;
	my $root = XML::XPath::Node::Element->new('contrast');
	for (0..$#paths){
		my $node = logPath($paths[$_],$left[$_],$right[$_]);
		$root->appendChild($node);
	}
	return $root;
}

sub logPath{ # returns an xpath object
	my $path = shift;
	my $leftRef = shift;
	my @left = @$leftRef; # array of scalars
	my $rightRef = shift;
	my @right = @$rightRef;
	my $root = XML::XPath::Node::Element->new('aspect');
	my $id = XML::XPath::Node::Element->new('path');
	my $text = XML::XPath::Node::Text->new($path);
	$id->appendChild($text);
	$root->appendChild($id);
	#attach($root,'/path',$path);
	my $left = logArray('left',\@left);
	my $right = logArray('right',\@right);
	$root->appendChild($left);
	$root->appendChild($right);
	return $root;
}

sub logArray{ # returns an xpath object
	my $name = shift;
	my $arrayRef = shift;
	my @array = @$arrayRef;
	my $root = XML::XPath::Node::Element->new($name);
	for (0..$#array){
		#print @array;
		#print "\n";
		#print $array[$_];
		my $text = $array[$_];
		my $node = XML::XPath::Node::Element->new('val');
		my $child = XML::XPath::Node::Text->new($text);
		$node->appendChild($child);
		$root->appendChild($node);
	}
	return $root;
}

#pretty-prints xml
sub pretty{
	$_ = shift;                       # the file, all as one string
	s">[\n\s]*<"><"g;                 # remove inter-element prettiness
	s"<!--[\n\s]*"<!--"g;             # remove begin-comment prettiness
	s"[\n\s]*-->"-->"g;               # remove end-comment prettiness
	s"(<[^/])"\n$1"g;                 # newline before every begin-element
	s"^\n"";                          # eliminate initial newline
	s"></">\n</"g;                    # newline before successor end-element
	s">-->">\n-->"g;                  # newline before successor end-comment
	s"([^>])\n</"$1</"g;              # drop noninformative newline after text
	s"(<[^/!]+>)\n</"$1</"g;          # fuse empty element
	s"(<[^/! ]+.*)></[^>]+>"$1 />"g;  # collapse empty element
	s"\s/>"/>"g;                      # trim empty element
	my @lines = split('\n',$_);
	my $depth = -1;
	foreach my $line (@lines){
		$depth+=1 if $line=~'^<[^/]';           # increase depth for begin-element
		$line = ( ' ' x $depth ) . $line;       # indent to depth
		$depth-=1 if $line=~'(/>)|(</)|(-->)';  # decrease depth for end-element
	}
	$_ = join "\n", @lines;
	$_ = $_ . "\n";
	$_
}

